# clean ITPAJD Data
# -------------------------------------------------------------------- #
# prelims ####
# -------------------------------------------------------------------- #
rm(list = ls())
library(haven)# to import/export to stata
library(rgdal)
library(raster)
library(readxl) # load data
library(classInt)
library(Hmisc) # for labels
library(GISTools) # work with shape files
library(tidyverse)
library(units)
library(sf) # simple features

# paths
  # main directory
dir <- paste0("PATH_TO_MAIN_DIRECTORY_HERE")
setwd(dir)
  # ATC
path0912 <- paste0(dir,"orig/atc/OMIC_ATC_2009_2012")
path1316 <- paste0(dir,"orig/atc/OMIC_ATC_2013_2016")
path1719 <- paste0(dir,"orig/atc/OMIC_ATC_2017_2019")
  # Ajuntament de Barcelona
SecCenPath <- paste0(dir,"orig/aj_barcelona/0301040100_BCN_UNITATS_ADM/0301040100_SecCens_ADM_ETRS89.shp")
  # Idescat
idescat <- paste0(dir,"orig/idescat/Seccions Censals 2002-2016/")

# -------------------------------------------------------------------- #
# clean itp data ####
# -------------------------------------------------------------------- #

# 2009_2012
ITP_2009_2012 <- st_read(stringsAsFactors = FALSE,
                   dsn = path.expand(sprintf("%s", path0912, "OMIC_ATC_2009_2012"))) %>%
  st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')
# 2013-2016
ITP_2013_2016 <- st_read(stringsAsFactors = FALSE,
                   dsn = path.expand(sprintf("%s", path1316, "OMIC_ATC_2013_2016"))) %>%
  st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')
# 2017-2019
ITP_2017_2019 <- st_read(stringsAsFactors = FALSE,
                         dsn = path.expand(sprintf("%s", path1719, "OMIC_ATC_2017_2019"))) %>%
  st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs') 
ITP_2017_2019$OBJECTID <-  seq.int(nrow(ITP_2017_2019)) 

# get list of unique PlotCodes and their Geometries
ITP_Geom <- rbind(ITP_2009_2012[c("PARCELLA","geometry")],ITP_2013_2016[c("PARCELLA","geometry")],ITP_2017_2019[c("PARCELLA","geometry")]) %>%
  distinct(PARCELLA, .keep_all = TRUE) 

# -------------------------------------------------------------------- #
# add census tract ####
# -------------------------------------------------------------------- #

# years
firstyear <- 2016
lastyear <- 2002

# census tracts change over time
for(year in firstyear:lastyear){
  
  # path to the census tract in the year
  cenfile <- paste0("bseccenv10sh1f1_", year,"0101_0")
  cenpath <- paste0(idescat, cenfile)

  # load census data
  CensShp <- st_read(stringsAsFactors = FALSE,
                  dsn = path.expand(sprintf("%s", cenpath, cenfile)))  %>%
    st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')

  # rename
  CensShp$CPRO <- substr(CensShp$MUNICIPI,start = 1,stop = 2)
  CensShp$CMUN <- substr(CensShp$MUNICIPI,start = 3,stop = 5)
  CensShp$CCA <- 09
  names(CensShp)[names(CensShp)=="MUNICIPI"] <- "CMUN_DC"
  names(CensShp)[names(CensShp)=="DISTRICTE"] <- "CDIS"
  names(CensShp)[names(CensShp)=="SECCIO"] <- "CSEC"
  CensShp$CUSEC <- paste0(CensShp$CPRO,CensShp$CMUN,CensShp$CDIS,CensShp$CSEC)

  # Intersect ATC-ITP data with census data to recover census tract
  SecData <- st_intersection(ITP_Geom, CensShp)

  # remove geometry
  SecData$geometry <- NULL

  # add year
  SecData$year <- year
  
  # remove duplicates
  SecData <- distinct(SecData, PARCELLA, CUSEC, .keep_all = TRUE)
  
  # append data with different years  
  if(year == firstyear){ 
    SecDataYears <- SecData
  }else{ 
    SecDataYears <- rbind(SecDataYears, SecData)
  }

  # remove files to save space
  rm(list=c("SecData","CensShp"))

}

# store data R and stata
filename_R <- paste0(dir,"data/int/ATC-ITP_Parcela_Census.RData")
filename_dta <- paste0(dir, "data/int/ATC-ITP_Parcela_Census.dta")

write_dta(data = SecDataYears, filename_dta)
save(SecDataYears, file = filename_R)

# -------------------------------------------------------------------- #
# add barris ####
# -------------------------------------------------------------------- #

varscensus <- c('BARRI','AEB','ZUA','LITERAL')
## Cartography of Districts and Census Tracts
GisDataSecCen <- st_read(dsn = SecCenPath) %>%
  select(c(varscensus))
# get coordinates of census tract
CRS <- st_crs(GisDataSecCen)

# -------------------------------------------------------------------- #
# export ####
# -------------------------------------------------------------------- #
# put variables together
ITP_Data <- rbind(ITP_2009_2012,ITP_2013_2016,ITP_2017_2019) 

# get census tracts
ITP_Data$year <- ITP_Data$ANYMERITAC
ITP_Data$year[ITP_Data$year>2016] <- 2016
ITP_Data <- merge(x = ITP_Data, y = SecDataYears, by = c("PARCELLA","year"), all.x = TRUE)
names(ITP_Data)[names(ITP_Data)=="year"] <- "CensusYear"
# change coordinate system
ITP_Data <- ITP_Data %>% st_transform(CRS)

# get BARRI
Y <- st_intersection(ITP_Data, GisDataSecCen) %>%
  select(c("OBJECTID","PARCELLA","DATAMERITA","BARRI","AEB","ZUA","LITERAL"))
Y$geometry <- NULL

# merge
ITP_Data <- merge(x = ITP_Data, y = Y, by = c("OBJECTID","PARCELLA","DATAMERITA"), all.x = TRUE, all.y = FALSE)

# remove some variables and geometry
ITP_Data <- ITP_Data [, !names(ITP_Data) %in% c("DATAEXTRAC")]
st_geometry(ITP_Data) <- NULL

# store data R and stata
filename_R <- paste0(dir,"data/int/ATC-ITP_2009_19", ".RData")
filename_dta <- paste0(dir,"data/int/ATC-ITP_2009_19", ".dta")

write_dta(data = ITP_Data, filename_dta)
save(ITP_Data, file = filename_R)

# -------------------------------------------------------------------- #
# closing ####
# -------------------------------------------------------------------- #
rm(list = ls())
